5. Counting Objects in Each Direction#
Import Libraries#
import pandas as pd
import geopandas as gpd
Reading data#
data_cleaned = pd.read_csv('data/1_data_cleaned.csv')
lines_gdf_stat = gpd.read_file('data/2_lines_gdf_stat.gpkg')
points_gdf = gpd.GeoDataFrame(data_cleaned, geometry=gpd.points_from_xy(data_cleaned['lon'], data_cleaned['lat']), crs=4326)
lines_gdf_cars = lines_gdf_stat[lines_gdf_stat['object_type'] =='CAR']
lines_gdf_pedestrians = lines_gdf_stat[lines_gdf_stat['object_type'] =='PEDESTRIAN']
lines_gdf_two_wheelers = lines_gdf_stat[lines_gdf_stat['object_type'] =='CYCLIST']
Defining directions and calculating number of cars in each direction. Option 1#
Creating Starting points#
starting_points = points_gdf.groupby('object_id').first().reset_index().set_crs(epsg=4326)
starting_points.head()
| object_id | Unnamed: 0 | timestamp | heading | height | width | length | v | tracking_status | object_type | lon | lat | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 152997118 | 0 | 1712062811083 | 132.072 | 0.735 | 1.942 | 4.387 | 0.03 | TRACKING | CAR | 13.064405 | 47.810136 | POINT (13.06440 47.81014) |
| 1 | 152997181 | 2802 | 1712062820184 | 198.052 | 1.458 | 0.641 | 0.366 | 1.10 | TRACKING | PEDESTRIAN | 13.063991 | 47.810058 | POINT (13.06399 47.81006) |
| 2 | 152997182 | 4156 | 1712062820083 | 319.543 | 1.690 | 1.916 | 4.555 | 0.02 | TRACKING | CAR | 13.064130 | 47.810046 | POINT (13.06413 47.81005) |
| 3 | 152997183 | 6803 | 1712062822387 | 103.052 | 0.539 | 1.923 | 4.550 | 1.92 | TRACKING | CAR | 13.063387 | 47.809774 | POINT (13.06339 47.80977) |
| 4 | 152997184 | 7812 | 1712062824786 | 85.395 | 1.207 | 1.981 | 4.367 | 2.18 | TRACKING | CAR | 13.063389 | 47.809776 | POINT (13.06339 47.80978) |
starting_points.explore(tiles='Esri.WorldImagery')
Make this Notebook Trusted to load map: File -> Trust Notebook
ending_points = points_gdf.groupby('object_id').last().reset_index().set_crs(epsg=4326)
ending_points.head()
| object_id | Unnamed: 0 | timestamp | heading | height | width | length | v | tracking_status | object_type | lon | lat | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 152997118 | 2780 | 1712063097385 | 313.127 | 0.348 | 1.955 | 4.500 | 0.04 | TRACKING | CAR | 13.064403 | 47.810137 | POINT (13.06440 47.81014) |
| 1 | 152997181 | 4145 | 1712062958884 | 133.052 | 0.528 | 0.645 | 0.209 | 0.75 | TRACKING | PEDESTRIAN | 13.064302 | 47.809506 | POINT (13.06430 47.80951) |
| 2 | 152997182 | 6780 | 1712063090784 | 139.865 | 0.119 | 1.883 | 4.461 | 0.03 | TRACKING | CAR | 13.064130 | 47.810045 | POINT (13.06413 47.81005) |
| 3 | 152997183 | 7790 | 1712062924186 | 23.052 | 0.017 | 1.880 | 4.287 | 10.95 | TRACKING | CAR | 13.064855 | 47.810200 | POINT (13.06485 47.81020) |
| 4 | 152997184 | 8793 | 1712062925683 | 33.052 | 0.596 | 1.959 | 4.330 | 8.31 | TRACKING | CAR | 13.064773 | 47.810138 | POINT (13.06477 47.81014) |
ending_points.explore(tiles='Esri.WorldImagery')
Make this Notebook Trusted to load map: File -> Trust Notebook
CARS Directions#
def create_regular_grid(gdf, square_size):
#calculating UTM zone for the data
utm_zone = gdf.estimate_utm_crs()
#перепроецируем набор данных
gdf = gdf.to_crs(utm_zone)
minX, minY, maxX, maxY = gdf.total_bounds
grid_cells = []
x, y = minX, minY
while y <= maxY:
while x <= maxX:
geom = Polygon([(x, y), (x, y + square_size), (x + square_size, y + square_size), (x + square_size, y), (x, y)])
grid_cells.append(geom)
x += square_size
x = minX
y += square_size
fishnet = gpd.GeoDataFrame(geometry=grid_cells, crs=utm_zone)
fishnet['grid_id'] = range(len(grid_cells))
fishnet = fishnet.to_crs(epsg=4326)
return fishnet
startingPointsCars = starting_points[starting_points['object_type']=="CAR"]
endingPointsCars = ending_points[ending_points['object_type']=="CAR"]
startEndPointsCars = pd.concat([startingPointsCars, endingPointsCars], ignore_index=True)
grid = create_regular_grid(startEndPointsCars, 7.5)
grid.explore(tiles='Esri.WorldImagery')
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[8], line 5
2 endingPointsCars = ending_points[ending_points['object_type']=="CAR"]
4 startEndPointsCars = pd.concat([startingPointsCars, endingPointsCars], ignore_index=True)
----> 5 grid = create_regular_grid(startEndPointsCars, 7.5)
6 grid.explore(tiles='Esri.WorldImagery')
Cell In[7], line 14, in create_regular_grid(gdf, square_size)
12 while y <= maxY:
13 while x <= maxX:
---> 14 geom = Polygon([(x, y), (x, y + square_size), (x + square_size, y + square_size), (x + square_size, y), (x, y)])
15 grid_cells.append(geom)
16 x += square_size
NameError: name 'Polygon' is not defined
points_grid = gpd.sjoin(startEndPointsCars, grid, predicate='within')
points_count = points_grid.groupby('grid_id').size().reset_index(name='points_count')
grid_with_count = grid.merge(points_count, on='grid_id', how='left')
grid_with_count.head()
| geometry | grid_id | points_count | |
|---|---|---|---|
| 0 | POLYGON ((13.06309 47.80930, 13.06309 47.80937... | 0 | NaN |
| 1 | POLYGON ((13.06319 47.80930, 13.06319 47.80937... | 1 | NaN |
| 2 | POLYGON ((13.06329 47.80930, 13.06329 47.80937... | 2 | NaN |
| 3 | POLYGON ((13.06339 47.80930, 13.06339 47.80937... | 3 | NaN |
| 4 | POLYGON ((13.06349 47.80931, 13.06349 47.80937... | 4 | NaN |
grid_main_clusters = grid_with_count[grid_with_count['points_count']>25]
grid_with_count.explore(column='points_count', tiles='Esri.WorldImagery')
Make this Notebook Trusted to load map: File -> Trust Notebook
Create Clusters#
Function to find clusters of polygons based on spatial relationships (touches or intersects:
def find_polygon_clusters(gdf):
# Ensure geometries are valid
gdf = gdf[gdf.is_valid]
# Initialize a dictionary to store cluster names
cluster_names = {}
# Initialize an empty list to store groups of neighboring polygons
polygon_groups = []
# Counter for cluster names
cluster_counter = 1
# Loop through each polygon and find its neighbors
for idx, polygon in gdf.iterrows():
if idx not in cluster_names:
cluster_names[idx] = f'Cluster{cluster_counter}'
cluster_counter += 1
# Find neighboring polygons (e.g., polygons that touch or intersect)
neighbors = gdf[gdf.geometry.touches(polygon['geometry']) | gdf.geometry.intersects(polygon['geometry'])]
# Check if the polygon and its neighbors form a MultiPolygon
if len(neighbors) > 0:
merged_geometry = MultiPolygon([polygon['geometry']] + list(neighbors['geometry']))
polygon_groups.append(merged_geometry)
# Assign the same cluster name to all neighbors
for neighbor_idx in neighbors.index:
if neighbor_idx not in cluster_names:
cluster_names[neighbor_idx] = cluster_names[idx]
# Assign cluster names to the GeoDataFrame
gdf['cluster'] = gdf.index.map(cluster_names)
return gdf
cars_clusters = find_polygon_clusters(grid_main_clusters)
cars_clusters .explore(column='cluster', tiles='Esri.WorldImagery')
Make this Notebook Trusted to load map: File -> Trust Notebook
startingPointsCars_CLUSTER = gpd.sjoin(startingPointsCars.to_crs(cars_clusters .crs), cars_clusters , predicate='within')
endingPointsCars_CLUSTER = gpd.sjoin(endingPointsCars.to_crs(cars_clusters .crs), cars_clusters , predicate='within')
startingPointsCars_CLUSTER['start_cluster'] = startingPointsCars_CLUSTER['cluster']
endingPointsCars_CLUSTER['end_cluster'] = endingPointsCars_CLUSTER['cluster']
startingPointsCars_CLUSTER.explore(column="cluster",tiles='Esri.WorldImagery')
Make this Notebook Trusted to load map: File -> Trust Notebook
endingPointsCars_CLUSTER.explore(column="cluster", tiles='Esri.WorldImagery')
Make this Notebook Trusted to load map: File -> Trust Notebook
Getting Directions
lines_gdf_cars = lines_gdf_cars.merge(startingPointsCars_CLUSTER[['object_id','start_cluster']], on="object_id", how="left")
lines_gdf_cars = lines_gdf_cars.merge(endingPointsCars_CLUSTER[['object_id','end_cluster']], on="object_id", how="left")
lines_gdf_cars.groupby(['start_cluster', 'end_cluster']).size()
start_cluster end_cluster
Cluster1 Cluster1 36
Cluster2 2
Cluster3 14
Cluster4 140
Cluster2 Cluster1 118
Cluster2 66
Cluster3 383
Cluster4 114
Cluster3 Cluster1 31
Cluster2 490
Cluster3 298
Cluster4 260
Cluster4 Cluster1 118
Cluster2 249
Cluster3 248
Cluster4 922
Cluster5 Cluster5 18
dtype: int64
Defining directions and calculating number of cars in each direction. Option 2#
The previous approach unfortunalty can’t be applied for pedestrians and two-wheelers as their routes are less clear distingishable and not strait. So, for pedestrians and two-wheelers we will define their directions baes on the intersection with crosslines created by us in QGIS
Defining directions and calculating number of cars in each direction. Option 3#
This approach is very easy to implement but it is less precise in comparison to the previous two. We have started with it and want to show it anyway as it was a part of our work and it still shows some overall information about directions
Calculating amount of objects in different directions for cars#
cars_heading_breaks = jenkspy.jenks_breaks(lines_gdf_cars['avg_heading'], 5)
# Создаем столбец для класса направления на основе разрывов
lines_gdf_cars['direction_class'] = pd.cut(lines_gdf_cars['avg_heading'], bins=cars_heading_breaks, labels=False, include_lowest=True)
# Считаем количество объектов в каждом классе направления
direction_class_counts = lines_gdf_cars['direction_class'].value_counts().sort_index()
print("Breaks:", cars_heading_breaks)
print("Direction Class Counts:\n", direction_class_counts)
Breaks: [8.052, 70.552, 158.05200000000002, 236.827, 276.276, 353.052]
Direction Class Counts:
0 642
1 1072
2 1361
3 641
4 462
Name: direction_class, dtype: int64
# Creating Histogram
sns.histplot(data=lines_gdf_cars, x='avg_heading', hue='direction_class', bins=20, alpha=0.8, palette='BuPu', edgecolor='lightgray')
# Removing graph border
sns.despine()
# Adding titlle and axis names
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram, bins colored by class')
# Showing graph
plt.show()
# Convert angles from degrees to radians for the polar plot
lines_gdf_cars['avg_heading_rad'] = np.deg2rad(lines_gdf_cars['avg_heading'])
# Create the polar plot
plt.figure(figsize=(10, 6))
ax = plt.subplot(111, projection='polar')
# Configure the polar plot: 0 degrees at the top and clockwise direction
ax.set_theta_zero_location('N')
ax.set_theta_direction(-1)
# Define the number of bins
num_bins = 8
bins = np.linspace(0, 2 * np.pi, num_bins + 1)
# Count the number of objects in each bin
hist, edges = np.histogram(lines_gdf_cars['avg_heading_rad'], bins=bins)
# Plot the histogram on the polar plot
bars = ax.bar(edges[:-1], hist, width=np.diff(edges), align='edge', edgecolor='black')
# Set angle labels in degrees
ax.set_xticks(np.deg2rad(np.arange(0, 360, 45)))
ax.set_xticklabels(['0°', '45°', '90°', '135°', '180°', '225°', '270°', '315°'])
# Add title
plt.title('Distribution of Objects by Avg Heading')
# Display the plot
plt.show()
Calculating amount of objects in different directions for pedestrians#
pedestrians_heading_breaks = jenkspy.jenks_breaks(lines_gdf_pedestrians['avg_heading'], 5)
# Создаем столбец для класса направления на основе разрывов
lines_gdf_pedestrians['direction_class'] = pd.cut(lines_gdf_pedestrians['avg_heading'], bins=pedestrians_heading_breaks, labels=False, include_lowest=True)
# Считаем количество объектов в каждом классе направления
direction_class_counts = lines_gdf_pedestrians['direction_class'].value_counts().sort_index()
print("Breaks:", pedestrians_heading_breaks)
print("Direction Class Counts:\n", direction_class_counts)
Breaks: [3.052, 89.59200000000001, 160.552, 228.439, 285.552, 358.052]
Direction Class Counts:
0 388
1 371
2 411
3 358
4 241
Name: direction_class, dtype: int64
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/geopandas/geodataframe.py:1538: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
super().__setitem__(key, value)
# Creating Histogram
sns.histplot(data=lines_gdf_pedestrians, x='avg_heading', hue='direction_class', bins=20, alpha=0.8, palette='BuPu', edgecolor='lightgray')
# Removing graph border
sns.despine()
# Adding titlle and axis names
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram, bins colored by class')
# Showing graph
plt.show()
# Convert angles from degrees to radians for the polar plot
lines_gdf_pedestrians['avg_heading_rad'] = np.deg2rad(lines_gdf_pedestrians['avg_heading'])
# Create the polar plot
plt.figure(figsize=(10, 6))
ax = plt.subplot(111, projection='polar')
# Configure the polar plot: 0 degrees at the top and clockwise direction
ax.set_theta_zero_location('N')
ax.set_theta_direction(-1)
# Define the number of bins
num_bins = 8
bins = np.linspace(0, 2 * np.pi, num_bins + 1)
# Count the number of objects in each bin
hist, edges = np.histogram(lines_gdf_pedestrians['avg_heading_rad'], bins=bins)
# Plot the histogram on the polar plot
bars = ax.bar(edges[:-1], hist, width=np.diff(edges), align='edge', edgecolor='black')
# Set angle labels in degrees
ax.set_xticks(np.deg2rad(np.arange(0, 360, 45)))
ax.set_xticklabels(['0°', '45°', '90°', '135°', '180°', '225°', '270°', '315°'])
# Add title
plt.title('Distribution of Objects by Avg Heading')
# Display the plot
plt.show()
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/geopandas/geodataframe.py:1538: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
super().__setitem__(key, value)
Calculating amount of objects in different directions for two-wheelers#
two_wheelers_heading_breaks = jenkspy.jenks_breaks(lines_gdf_two_wheelers['avg_heading'], 5)
# Создаем столбец для класса направления на основе разрывов
lines_gdf_two_wheelers['direction_class'] = pd.cut(lines_gdf_two_wheelers['avg_heading'], bins=two_wheelers_heading_breaks, labels=False, include_lowest=True)
# Считаем количество объектов в каждом классе направления
direction_class_counts = lines_gdf_two_wheelers['direction_class'].value_counts().sort_index()
print("Breaks:", cars_heading_breaks)
print("Direction Class Counts:\n", direction_class_counts)
Breaks: [8.052, 70.552, 158.05200000000002, 236.827, 276.276, 353.052]
Direction Class Counts:
0 43
1 66
2 59
3 45
4 37
Name: direction_class, dtype: int64
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/geopandas/geodataframe.py:1538: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
super().__setitem__(key, value)
# Creating Histogram
sns.histplot(data=lines_gdf_cars, x='avg_heading', hue='direction_class', bins=20, alpha=0.8, palette='BuPu', edgecolor='lightgray')
# Removing graph border
sns.despine()
# Adding titlle and axis names
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram, bins colored by class')
# Showing graph
Text(0.5, 1.0, 'Histogram, bins colored by class')
# Convert angles from degrees to radians for the polar plot
lines_gdf_cars['avg_heading_rad'] = np.deg2rad(lines_gdf_cars['avg_heading'])
# Create the polar plot
plt.figure(figsize=(10, 6))
ax = plt.subplot(111, projection='polar')
# Configure the polar plot: 0 degrees at the top and clockwise direction
ax.set_theta_zero_location('N')
ax.set_theta_direction(-1)
# Define the number of bins
num_bins = 8
bins = np.linspace(0, 2 * np.pi, num_bins + 1)
# Count the number of objects in each bin
hist, edges = np.histogram(lines_gdf_cars['avg_heading_rad'], bins=bins)
# Plot the histogram on the polar plot
bars = ax.bar(edges[:-1], hist, width=np.diff(edges), align='edge', edgecolor='black')
# Set angle labels in degrees
ax.set_xticks(np.deg2rad(np.arange(0, 360, 45)))
ax.set_xticklabels(['0°', '45°', '90°', '135°', '180°', '225°', '270°', '315°'])
# Add title
plt.title('Distribution of Objects by Avg Heading')
# Display the plot
plt.show()